import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
pd.options.plotting.backend = "plotly"
import numpy as np
from am4chart import *
def get_loc(df,ix,default=0,cols=None):
try:
if(cols):
return df.loc[ix][cols]
return df.loc[ix]
except TypeError:
return default
def normalizePer(df,col,sum_col,group_col,copy_of = None,new_name=None):
if not new_name:
new_name = col
if not copy_of:
copy_of = col
df[new_name] = df[copy_of]
total = df[[group_col,sum_col]].groupby(group_col).sum()
for xi in total.index:
tot = total.loc[xi,sum_col]
mask = df_months[group_col]==xi
df.loc[mask,new_name] = df[mask][new_name].divide(tot/100)
pd.options.display.max_columns = None
pd.options.display.max_rows = None
data_folder = "../data/"
am4 = Amchart()
#plotly style:
line_traces = dict(mode='lines+markers',line_shape='spline',line_smoothing=1,marker_size=10,marker_opacity=0.9)
All the data in this notebook are coming from the merge of two DBs in Ticino. This is still not perfect considering some redundancy (to be fixed in the future)
df_users = pd.read_csv(f"{data_folder}users.csv")
users_per_year = df_users.groupby("start_year").size().reset_index(name='count')
month_map={1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
month_order=['Aug','Sep','Oct','Nov','Dec','Jan','Feb','Mar','Apr','May','Jun','Jul']
df_months = pd.read_csv(f"{data_folder}months.csv").fillna(0)
df_months.replace({'month':month_map},inplace=True)
df_months['month_order'] = df_months['month'].map(dict(zip(month_order,range(1,13))))
df_months.sort_values(by=['month_order','activity_school_year'],inplace=True)
df_months.head()
y_users = df_months['n_users_per_year']
df_months['norm_avg_n_user_recipes'] = df_months['n_recipes'].divide(y_users)
df_months['norm_avg_n_user_experiences'] = df_months['n_experiences'].divide(y_users)
df_months['norm_avg_n_activities'] = df_months['n_activities'].divide(y_users)
fig = df_months.plot(x="month", y=["n_recipes","n_experiences","n_feedback_responses","n_edits"],facet_col='activity_school_year')
fig.update_layout(
title="Activities, feedback requests, feedback responses",
title_x=0.5,
yaxis_title="count",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
#fig.update_yaxes(type="log")
fig.update_traces(line_traces)
fig.show()
fig = df_months.plot(x="month", y=["n_activities"],facet_col='activity_school_year')
fig.update_layout(
title="Total number of activities per month",
title_x=0.5,
yaxis_title="# activities",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.update_traces(line_traces)
fig.show()
fig = df_months.plot(x="month", y=["norm_avg_n_activities"],facet_col='activity_school_year')
fig.update_layout(
title="Normalized number of activities per users per month",
title_x=0.5,
yaxis_title="average activities",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.update_traces(line_traces)
fig.show()
fig = df_months.plot.bar(x="month", y=["norm_avg_n_user_recipes","norm_avg_n_user_experiences"],facet_col='activity_school_year')
fig.update_layout(
title="Average number of activities per user per month",
title_x=0.5,
yaxis_title="average activities",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.show()
# normalize wrt number of activities per month
df_months['norm_n_files_recipes'] = df_months['n_files_recipes'].divide(df_months['n_activities'])
df_months['norm_n_files_experiences'] = df_months['n_files_experiences'].divide(df_months['n_activities'])
fig = df_months.plot.bar(x="month", y=['norm_n_files_recipes','norm_n_files_experiences'],facet_col='activity_school_year')
fig.update_layout(
title="Number of activities's files per per activity per month",
title_x=0.5,
yaxis_title="# of files per activity",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.show()
And the average grouped by years:
df_months[['activity_school_year','n_files','avg_n_files','std_n_files']]\
.groupby('activity_school_year').mean()
fig = df_months.plot.bar(x="month", y=['perc_total_feedback_requests_recipes','perc_total_feedback_requests_experiences']
,facet_col='activity_school_year')
fig.update_layout(
title="% of activities with requests for feedback per month",
title_x=0.5,
yaxis_title="% activities with request",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.show()
normalizePer(df_months,'perc_total_feedback_requests_recipes',
'n_feedback_requests','activity_school_year','n_feedback_requests_recipes')
normalizePer(df_months,'perc_total_feedback_requests_experiences',
'n_feedback_requests','activity_school_year','n_feedback_requests_experiences')
fig = df_months.plot.bar(x="month", y=['perc_total_feedback_requests_recipes','perc_total_feedback_requests_experiences'],
facet_col='activity_school_year')
fig.update_layout(
title="% of feedback requests over the school year",
title_x=0.5,
yaxis_title="% activities with request",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.show()
TODO: try with request date
fig = df_months.plot.bar(x="month", y=['perc_in_curriculum'],
facet_col='activity_school_year')
fig.update_layout(
title="% of activities in curriculum per month",
title_x=0.5,
yaxis_title="% activities in curriculum",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.show()
TODO: CONTROLLARE BENE LA QUERY n_activities per via del "final". TODO: vedere se moltiplicare i due
normalizePer(df_months,'norm_perc_recipes_in_curriculum',
'n_activities','activity_school_year','n_in_curriculum_recipes')
normalizePer(df_months,'norm_perc_experiences_in_curriculum',
'n_activities','activity_school_year','n_in_curriculum_experiences')
fig = df_months.plot.bar(x="month", y=['norm_perc_recipes_in_curriculum','norm_perc_experiences_in_curriculum'],
facet_col='activity_school_year')
fig.update_layout(
title="Normalized % of activities in curriculum per month",
title_x=0.5,
yaxis_title="% activities in curriculum",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.show()
(total length: description + steps + observations)
fig = df_months.plot.bar(x="month", y=['avg_activity_total_length'],
facet_col='activity_school_year')
fig.update_layout(
title="Activity total length per month",
title_x=0.5,
yaxis_title="Total lenght average",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.show()
fig = df_months.plot.bar(x="month", y=['avg_len_descriptions','avg_len_steps','avg_len_observations'],
facet_col='activity_school_year')
fig.update_layout(
title="Activity total length per month",
title_x=0.5,
yaxis_title="Total lenght average",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.show()
df_months[['activity_school_year','avg_activity_total_length','std_activity_total_length']]\
.groupby('activity_school_year').mean()
Note: std very high because of NULL descriptions. TODO: try without NULL descriptions
fig = df_months.plot.bar(x="month", y=['avg_sum_len_reflections'],
facet_col='activity_school_year')
fig.update_layout(
title="Average reflections total length per month",
title_x=0.5,
yaxis_title="Total average lenght average",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.show()
fig = df_months.plot.bar(x="month", y=['avg_len_bilancio','avg_len_competenze','avg_len_miglioramenti','avg_len_critici'],
facet_col='activity_school_year')
fig.update_layout(
title="Total length of average reflections per month",
title_x=0.5,
yaxis_title="Total lenght of average",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.show()
fig = df_months.plot(x="month", y=["n_edits"],facet_col='activity_school_year')
fig.update_layout(
title="Total number of activities per month",
title_x=0.5,
yaxis_title="# activities",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.update_traces(line_traces)
fig.show()
dayofweek_map = {0:'Sun',1:'Mon',2:'Tue',3:'Wed',4:'Thu',5:'Fri',6:'Sat'}
df_students_logins = pd.read_csv(f"{data_folder}students_logins.csv")\
.drop(['ut_user_type','start_semester','start_year'],axis=1)
df_students_logins['date'] = df_students_logins['date'].astype('datetime64')
df_students_logins.head()
date_hist_students_logins = pd.DataFrame(df_students_logins['date'].groupby([df_students_logins["date"].dt.dayofweek,df_students_logins["date"].dt.hour]).count())
date_hist_students_logins = date_hist_students_logins.rename(columns={"date": "count"})
date_hist_students_logins = pd.DataFrame([(x,y,get_loc(date_hist_students_logins,(x,y),0,'count')) for x in range(7) for y in range(24)])
date_hist_students_logins.columns = ['dayofweek','hour','count']
date_hist_students_logins.replace({'dayofweek':dayofweek_map},inplace=True)
fig = date_hist_students_logins.plot(x="hour", y=["count"],facet_col='dayofweek',color='dayofweek')
fig.update_layout(
title="Apprentices logins",
title_x=0.5,
yaxis_title="# logins",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.update_traces(line_traces)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))
fig.show()
fig = date_hist_students_logins.plot(x="hour", y=["count"],line_group='dayofweek', color='dayofweek')
fig.update_layout(
title="Apprentices logins",
title_x=0.5,
yaxis_title="# logins",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.update_traces(line_traces)
fig.show()
fig = date_hist_students_logins.groupby("hour").sum().reset_index().plot(x="hour", y=["count"])
fig.update_layout(
title="Cumulate apprentices logins",
title_x=0.5,
yaxis_title="# logins",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.update_traces(line_traces)
fig.show()
df_months_supervisors = pd.read_csv(f"{data_folder}months_supervisors.csv").fillna(0).sort_values(by=['month','activity_school_year'])
df_months_supervisors.replace({'month':month_map},inplace=True)
df_months_supervisors['month_order'] = df_months['month'].map(dict(zip(month_order,range(1,13))))
df_months_supervisors.sort_values(by=['month_order','activity_school_year'],inplace=True)
df_months_supervisors.head()
normalizePer(df_months_supervisors,'perc_total_feedback_responses_recipes',
'n_feedback_responses','activity_school_year','n_feedback_responses_recipes')
normalizePer(df_months_supervisors,'perc_total_feedback_responses_experiences',
'n_feedback_responses','activity_school_year','n_feedback_responses_experiences')
fig = df_months_supervisors.plot.bar(x="month", y=['perc_total_feedback_responses_recipes','perc_total_feedback_responses_experiences'],
facet_col='activity_school_year')
fig.update_layout(
title="% of feedback responses over the school year",
title_x=0.5,
yaxis_title="% of responses",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.show()
df_months_supervisors['ration_response'] = (df_months_supervisors['n_feedback_responses']/df_months['n_feedback_requests']).fillna(0)
fig = df_months_supervisors.plot.bar(x="month", y=['ration_response'],
facet_col='activity_school_year')
fig.update_layout(
title="Ratio responses/requests",
title_x=0.5,
yaxis_title="responses/requests",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.show()
fig = df_months_supervisors.plot.bar(x="month", y=['avg_len_bilancio','avg_len_competenze','avg_len_miglioramenti','avg_len_critici'],
facet_col='activity_school_year')
fig.update_layout(
title="Total length of average feedbacks per month",
title_x=0.5,
yaxis_title="Total length",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.show()
df_supervisors_feedback = pd.read_csv(f"{data_folder}supervisors_feedbacks.csv")
df_supervisors_feedback.fillna(0, inplace=True)
df_supervisors_feedback['ratio'] = df_supervisors_feedback['sent']/df_supervisors_feedback['received']
df_supervisors_feedback.loc[df_supervisors_feedback['ratio']>1,'ratio'] = 1.0
fig = df_supervisors_feedback.plot.hist(x="ratio",nbins=40)
fig.update_layout(
title="Histogram: ratio responses/requests per supervisor",
title_x=0.5,
yaxis_title="Count",
)
fig.show()
received_trigger_min = 0
received_trigger_max = 150
df_trigger=df_supervisors_feedback[(df_supervisors_feedback['received']>=received_trigger_min) & (df_supervisors_feedback['received']<=received_trigger_max)]\
.sort_values(by="ratio")
fig = df_trigger.plot.bar(x="us_user",y="ratio",color="received")
fig.update_layout(
title="Ratio responses/requests per each supervisor",
title_x=0.5,
xaxis_title='supervisor',
xaxis_type='category'
)
fig.update_xaxes(showticklabels=False)
fig.show()
received_trigger_min = 5
received_trigger_max = 150
df_trigger=df_supervisors_feedback[(df_supervisors_feedback['received']>=received_trigger_min) & (df_supervisors_feedback['received']<=received_trigger_max)]\
.sort_values(by="ratio")
fig = df_trigger\
.plot.scatter(x="us_user",y="ratio",color="received")
fig.update_layout(
title="Ratio responses/requests per each supervisor",
title_x=0.5,
xaxis_title='supervisor',
xaxis_type='category'
)
fig.update_traces(mode='markers',opacity=0.8,)
fig.add_trace(df_trigger.plot.bar(x="us_user",y="ratio",color="received").data[0])
fig.update_xaxes(showticklabels=False)
fig.show()
bests=df_supervisors_feedback.sort_values(by=["ratio","received"],ascending=False).head(10)
bests.set_index("us_user").join(df_users[['us_user','user_email','user_name']].set_index("us_user"))\
[['user_name','ratio','received']]
df_feedbacks_info = pd.read_csv(f"{data_folder}activities_feedbacks_info.csv")
df_feedbacks_info.head()
no_answer = df_feedbacks_info['student_grade'].isnull().sum()
print(f'There are {no_answer}/{len(df_feedbacks_info)} without student grade ({round(no_answer/len(df_feedbacks_info),4)*100}%)')
no_answer = df_feedbacks_info['response_date'].isnull().sum()
print(f'There are {no_answer}/{len(df_feedbacks_info)} without answer ({round(no_answer/len(df_feedbacks_info),4)*100}%)')
no_answer = df_feedbacks_info['supervisor_grade'].isnull().sum()
print(f'There are {no_answer}/{len(df_feedbacks_info)} without supervisor grade ({round(no_answer/len(df_feedbacks_info),4)*100}%)')
response_no_grade = (df_feedbacks_info['response_date'].notnull() & df_feedbacks_info['supervisor_grade'].isnull()).sum()
grade_no_response = (df_feedbacks_info['response_date'].isnull() & df_feedbacks_info['supervisor_grade'].notnull()).sum()
print(f'There are {response_no_grade} responses without grades')
grade_no_response #TODO: VERIFICARE QUESTO!
df_feedbacks_info.dropna(inplace=True)
df_feedbacks_info['delay_days'] = df_feedbacks_info['delay_hours']/24
df_hist = df_feedbacks_info.groupby(["activity_school_year","times_before_answer"]).count()[['ac_activity']].reset_index().rename(columns={'ac_activity': 'count'})
fig = df_hist.plot.hist(x="times_before_answer", y='count',
color = "activity_school_year", histnorm='percent' )
fig.update_layout(
barmode='group',
xaxis_type='category',
title="Requests before a feedback",
title_x=0.5,
yaxis_title="Percent",
xaxis_title="Number of requests before the response",
coloraxis_showscale=False,
)
fig.show()
fig = df_feedbacks_info[df_feedbacks_info['delay_days']<100].plot.hist(x="delay_days", nbins=30,
color = "activity_school_year" )
fig.update_layout(
barmode='group',
title="Histogram: days before an answer",
title_x=0.5,
yaxis_title="Count",
xaxis_title="days before an answer",
coloraxis_showscale=False,
)
fig.show()
fig = df_feedbacks_info[df_feedbacks_info['delay_days']<100].plot.hist(x="delay_days", nbins=30,
color = "activity_school_year", histnorm='percent' )
fig.update_layout(
barmode='group',
title="Histogram: normalized number of days before an answer",
title_x=0.5,
yaxis_title="Percent",
xaxis_title="days before an answer",
coloraxis_showscale=False,
)
fig.show()
fig = df_feedbacks_info[df_feedbacks_info['edits_after']<5].plot.hist(x="edits_after", nbins=5,
color = "activity_school_year", histnorm='percent' )
fig.update_layout(
barmode='group',
title="Histogram: normalized number of edits after a feedback",
title_x=0.5,
yaxis_title="Percent",
xaxis_title="# of edits after an answer per feedback request",
)
fig.show()
df_feedbacks_info['has_edit_after'] = (df_feedbacks_info['edits_after']>0).astype(int)
df_count = df_feedbacks_info.groupby(["activity_school_year","supervisor_grade","has_edit_after"]).size().reset_index(name='count')
c = (pd.core.reshape.util.cartesian_product([df_count['activity_school_year'].unique(),df_count['supervisor_grade'].unique(),df_count['has_edit_after'].unique()]))
c = pd.DataFrame(dict(activity_school_year=c[0],supervisor_grade=c[1],has_edit_after=c[2]))
c = c.set_index(["activity_school_year","supervisor_grade","has_edit_after"]).join(df_count.set_index(["activity_school_year","supervisor_grade","has_edit_after"])).reset_index().sort_values(by=['activity_school_year','supervisor_grade'])
c.fillna(0,inplace=True)
df_feedback_edits = c[c['has_edit_after']==True].drop("has_edit_after",axis=1).rename(columns={'count':'edit'})
df_feedback_edits['no_edit'] = c[c['has_edit_after']==False]['count'].tolist()
df_feedback_edits['ratio'] = df_feedback_edits['edit'].div(df_feedback_edits['no_edit']+df_feedback_edits['edit']).fillna(0)
fig = df_feedback_edits.plot(x="supervisor_grade", y=["edit"],
line_group='activity_school_year', color='activity_school_year')
fig.update_layout(
title="# activities that have been edit after a feedback per grade",
title_x=0.5,
yaxis_title="# activities",
xaxis_title="supervisor grade",
)
fig.update_traces(line_traces)
fig.show()
df_supervisors_logins = pd.read_csv(f"{data_folder}supervisors_logins.csv")\
.drop(['ut_user_type','start_semester','start_year'],axis=1)
df_supervisors_logins['date'] = df_supervisors_logins['date'].astype('datetime64')
date_hist_supervisors_logins = pd.DataFrame(df_supervisors_logins['date'].groupby([df_supervisors_logins["date"].dt.dayofweek,df_supervisors_logins["date"].dt.hour]).count())
date_hist_supervisors_logins = date_hist_supervisors_logins.rename(columns={"date": "count"})
date_hist_supervisors_logins = pd.DataFrame([(x,y,get_loc(date_hist_supervisors_logins,(x,y),0,'count')) for x in range(7) for y in range(24)])
date_hist_supervisors_logins.columns = ['dayofweek','hour','count']
date_hist_supervisors_logins.replace({'dayofweek':dayofweek_map},inplace=True)
fig = date_hist_supervisors_logins.plot(x="hour", y=["count"],facet_col='dayofweek',color='dayofweek')
fig.update_layout(
title="Supervisors logins",
title_x=0.5,
yaxis_title="# logins",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.update_traces(line_traces)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))
fig.show()
fig = date_hist_supervisors_logins.plot(x="hour", y=["count"],line_group='dayofweek', color='dayofweek')
fig.update_layout(
title="Supervisors logins",
title_x=0.5,
yaxis_title="# logins",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.update_traces(line_traces)
fig.show()
fig = date_hist_supervisors_logins.groupby("hour").sum().reset_index().plot(x="hour", y=["count"])
fig.update_layout(
title="Cumulate supervisors logins",
title_x=0.5,
yaxis_title="# logins",
legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.update_traces(line_traces)
fig.show()
plt1 = date_hist_supervisors_logins.groupby("hour").sum().reset_index()
plt2 = date_hist_students_logins.groupby("hour").sum().reset_index()
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=plt1['hour'], y=plt1['count'],
name='Supervisors'))
fig.add_trace(go.Scatter(x=plt2['hour'], y=plt2['count'],
name='Apprentices'))
fig.update_layout(
title="Cumulate logins",
title_x=0.5,
yaxis_title="# Log10 logins",
xaxis_title="hour of the day",
yaxis_type="log",
)
fig.update_traces(line_traces)
fig.show()
df_years = pd.read_csv(f"{data_folder}years_test.csv")
df_years.head()
df_only_years = df_years.groupby(["activity_school_year","start_year"]).sum().reset_index()
df_only_years['activity_school_year']=df_only_years['activity_school_year'].astype(str)
fig = df_only_years.plot.hist(x="start_year", y=['avg_n_user_activities'],color="activity_school_year")
fig.update_layout(
xaxis_type='category',
barmode='group',
title="Total number of activities per month",
title_x=0.5,
yaxis_title="Number of activities",
)
fig.show()
feedbacks_per_year = df_feedbacks_info.groupby('start_year').size().reset_index(name="count")
feedbacks_per_year['norm_count'] = feedbacks_per_year['count'].div(users_per_year['count'])
fig = feedbacks_per_year.plot.scatter(x="start_year",y="norm_count", size="count", color="count")
fig.update_layout(
title="Normalized feedbacks per year",
title_x=0.5,
)
fig.update_traces(mode='lines+markers',line_shape='spline',line_smoothing=0.5)
fig.show()